In [ ]:
import pandas as pd
from codefiles.datagen import x_plus_noise
from codefiles.dataplot import plot_2d
import matplotlib.pyplot as plt
%matplotlib inline

No randomness, y is a perfect linear function of x


In [ ]:
df = x_plus_noise(randomness=0)
plot_2d(df)
plt.show()
print('perfectly correlated x and y: {}'.format(df.x.corr(df.y)))

Now we add just a bit of noise


In [ ]:
df = x_plus_noise(randomness=0.1)
plot_2d(df)
plt.show()
print('correlation when y is a function of x but with a bit of noise: {}'.format(
    df.x.corr(df.y, method='spearman')))

This time let's add a good bit of noise


In [ ]:
df = x_plus_noise(randomness=1)
plot_2d(df)
plt.show()
print('correlation when y is a function of x but with lots of noise: {}'.format(
    df.x.corr(df.y, method='spearman')))

And now let's check out some negative correlation


In [ ]:
df = x_plus_noise(randomness=0, slope=-1)
plot_2d(df)
plt.show()
print('with slope=-1, perfect negative correlation: {}'.format(
    df.x.corr(df.y, method='spearman')))

In [ ]: